{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T03:38:17.003200Z",
     "start_time": "2021-08-27T03:38:15.332820Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T03:38:26.248579Z",
     "start_time": "2021-08-27T03:38:17.506574Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.\n",
      "Attempting to start a local H2O server...\n",
      "  Java Version: java version \"1.8.0_11\"; Java(TM) SE Runtime Environment (build 1.8.0_11-b12); Java HotSpot(TM) 64-Bit Server VM (build 25.11-b03, mixed mode)\n",
      "  Starting server from /home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/backend/bin/h2o.jar\n",
      "  Ice root: /tmp/tmpzlyrmlr_\n",
      "  JVM stdout: /tmp/tmpzlyrmlr_/h2o_caihengxing_started_from_python.out\n",
      "  JVM stderr: /tmp/tmpzlyrmlr_/h2o_caihengxing_started_from_python.err\n",
      "  Server is running at http://127.0.0.1:54321\n",
      "Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O_cluster_uptime:</td>\n",
       "<td>02 secs</td></tr>\n",
       "<tr><td>H2O_cluster_timezone:</td>\n",
       "<td>Asia/Shanghai</td></tr>\n",
       "<tr><td>H2O_data_parsing_timezone:</td>\n",
       "<td>UTC</td></tr>\n",
       "<tr><td>H2O_cluster_version:</td>\n",
       "<td>3.32.1.5</td></tr>\n",
       "<tr><td>H2O_cluster_version_age:</td>\n",
       "<td>22 days </td></tr>\n",
       "<tr><td>H2O_cluster_name:</td>\n",
       "<td>H2O_from_python_caihengxing_xvugu6</td></tr>\n",
       "<tr><td>H2O_cluster_total_nodes:</td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>H2O_cluster_free_memory:</td>\n",
       "<td>26.67 Gb</td></tr>\n",
       "<tr><td>H2O_cluster_total_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_allowed_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_status:</td>\n",
       "<td>accepting new members, healthy</td></tr>\n",
       "<tr><td>H2O_connection_url:</td>\n",
       "<td>http://127.0.0.1:54321</td></tr>\n",
       "<tr><td>H2O_connection_proxy:</td>\n",
       "<td>{\"http\": null, \"https\": null}</td></tr>\n",
       "<tr><td>H2O_internal_security:</td>\n",
       "<td>False</td></tr>\n",
       "<tr><td>H2O_API_Extensions:</td>\n",
       "<td>Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4</td></tr>\n",
       "<tr><td>Python_version:</td>\n",
       "<td>3.7.3 final</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  ------------------------------------------------------------------\n",
       "H2O_cluster_uptime:         02 secs\n",
       "H2O_cluster_timezone:       Asia/Shanghai\n",
       "H2O_data_parsing_timezone:  UTC\n",
       "H2O_cluster_version:        3.32.1.5\n",
       "H2O_cluster_version_age:    22 days\n",
       "H2O_cluster_name:           H2O_from_python_caihengxing_xvugu6\n",
       "H2O_cluster_total_nodes:    1\n",
       "H2O_cluster_free_memory:    26.67 Gb\n",
       "H2O_cluster_total_cores:    40\n",
       "H2O_cluster_allowed_cores:  40\n",
       "H2O_cluster_status:         accepting new members, healthy\n",
       "H2O_connection_url:         http://127.0.0.1:54321\n",
       "H2O_connection_proxy:       {\"http\": null, \"https\": null}\n",
       "H2O_internal_security:      False\n",
       "H2O_API_Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4\n",
       "Python_version:             3.7.3 final\n",
       "--------------------------  ------------------------------------------------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import h2o\n",
    "from h2o.automl import H2OAutoML\n",
    "h2o.init()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T03:38:55.191812Z",
     "start_time": "2021-08-27T03:38:55.188290Z"
    }
   },
   "outputs": [],
   "source": [
    "train_path = '../autox/data/kaggle_house_price/train.csv'\n",
    "test_path = '../autox/data/kaggle_house_price/test.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T03:38:57.390732Z",
     "start_time": "2021-08-27T03:38:55.386121Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n",
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "# Load data into H2O\n",
    "train = h2o.import_file(train_path)\n",
    "test  = h2o.import_file(test_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T03:38:57.786521Z",
     "start_time": "2021-08-27T03:38:57.624221Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th style=\"text-align: right;\">  Id</th><th style=\"text-align: right;\">  MSSubClass</th><th>MSZoning  </th><th style=\"text-align: right;\">  LotFrontage</th><th style=\"text-align: right;\">  LotArea</th><th>Street  </th><th>Alley  </th><th>LotShape  </th><th>LandContour  </th><th>Utilities  </th><th>LotConfig  </th><th>LandSlope  </th><th>Neighborhood  </th><th>Condition1  </th><th>Condition2  </th><th>BldgType  </th><th>HouseStyle  </th><th style=\"text-align: right;\">  OverallQual</th><th style=\"text-align: right;\">  OverallCond</th><th style=\"text-align: right;\">  YearBuilt</th><th style=\"text-align: right;\">  YearRemodAdd</th><th>RoofStyle  </th><th>RoofMatl  </th><th>Exterior1st  </th><th>Exterior2nd  </th><th>MasVnrType  </th><th style=\"text-align: right;\">  MasVnrArea</th><th>ExterQual  </th><th>ExterCond  </th><th>Foundation  </th><th>BsmtQual  </th><th>BsmtCond  </th><th>BsmtExposure  </th><th>BsmtFinType1  </th><th style=\"text-align: right;\">  BsmtFinSF1</th><th>BsmtFinType2  </th><th style=\"text-align: right;\">  BsmtFinSF2</th><th style=\"text-align: right;\">  BsmtUnfSF</th><th style=\"text-align: right;\">  TotalBsmtSF</th><th>Heating  </th><th>HeatingQC  </th><th>CentralAir  </th><th>Electrical  </th><th style=\"text-align: right;\">  1stFlrSF</th><th style=\"text-align: right;\">  2ndFlrSF</th><th style=\"text-align: right;\">  LowQualFinSF</th><th style=\"text-align: right;\">  GrLivArea</th><th style=\"text-align: right;\">  BsmtFullBath</th><th style=\"text-align: right;\">  BsmtHalfBath</th><th style=\"text-align: right;\">  FullBath</th><th style=\"text-align: right;\">  HalfBath</th><th style=\"text-align: right;\">  BedroomAbvGr</th><th style=\"text-align: right;\">  KitchenAbvGr</th><th>KitchenQual  </th><th style=\"text-align: right;\">  TotRmsAbvGrd</th><th>Functional  </th><th style=\"text-align: right;\">  Fireplaces</th><th>FireplaceQu  </th><th>GarageType  </th><th style=\"text-align: right;\">  GarageYrBlt</th><th>GarageFinish  </th><th style=\"text-align: right;\">  GarageCars</th><th style=\"text-align: right;\">  GarageArea</th><th>GarageQual  </th><th>GarageCond  </th><th>PavedDrive  </th><th style=\"text-align: right;\">  WoodDeckSF</th><th style=\"text-align: right;\">  OpenPorchSF</th><th style=\"text-align: right;\">  EnclosedPorch</th><th style=\"text-align: right;\">  3SsnPorch</th><th style=\"text-align: right;\">  ScreenPorch</th><th style=\"text-align: right;\">  PoolArea</th><th>PoolQC  </th><th>Fence  </th><th>MiscFeature  </th><th style=\"text-align: right;\">  MiscVal</th><th style=\"text-align: right;\">  MoSold</th><th style=\"text-align: right;\">  YrSold</th><th>SaleType  </th><th>SaleCondition  </th><th style=\"text-align: right;\">  SalePrice</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td style=\"text-align: right;\">   1</td><td style=\"text-align: right;\">          60</td><td>RL        </td><td style=\"text-align: right;\">           65</td><td style=\"text-align: right;\">     8450</td><td>Pave    </td><td>NA     </td><td>Reg       </td><td>Lvl          </td><td>AllPub     </td><td>Inside     </td><td>Gtl        </td><td>CollgCr       </td><td>Norm        </td><td>Norm        </td><td>1Fam      </td><td>2Story      </td><td style=\"text-align: right;\">            7</td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">       2003</td><td style=\"text-align: right;\">          2003</td><td>Gable      </td><td>CompShg   </td><td>VinylSd      </td><td>VinylSd      </td><td>BrkFace     </td><td style=\"text-align: right;\">         196</td><td>Gd         </td><td>TA         </td><td>PConc       </td><td>Gd        </td><td>TA        </td><td>No            </td><td>GLQ           </td><td style=\"text-align: right;\">         706</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        150</td><td style=\"text-align: right;\">          856</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">       856</td><td style=\"text-align: right;\">       854</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1710</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">             3</td><td style=\"text-align: right;\">             1</td><td>Gd           </td><td style=\"text-align: right;\">             8</td><td>Typ         </td><td style=\"text-align: right;\">           0</td><td>NA           </td><td>Attchd      </td><td style=\"text-align: right;\">         2003</td><td>RFn           </td><td style=\"text-align: right;\">           2</td><td style=\"text-align: right;\">         548</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">           61</td><td style=\"text-align: right;\">              0</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">       2</td><td style=\"text-align: right;\">    2008</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     208500</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   2</td><td style=\"text-align: right;\">          20</td><td>RL        </td><td style=\"text-align: right;\">           80</td><td style=\"text-align: right;\">     9600</td><td>Pave    </td><td>NA     </td><td>Reg       </td><td>Lvl          </td><td>AllPub     </td><td>FR2        </td><td>Gtl        </td><td>Veenker       </td><td>Feedr       </td><td>Norm        </td><td>1Fam      </td><td>1Story      </td><td style=\"text-align: right;\">            6</td><td style=\"text-align: right;\">            8</td><td style=\"text-align: right;\">       1976</td><td style=\"text-align: right;\">          1976</td><td>Gable      </td><td>CompShg   </td><td>MetalSd      </td><td>MetalSd      </td><td>None        </td><td style=\"text-align: right;\">           0</td><td>TA         </td><td>TA         </td><td>CBlock      </td><td>Gd        </td><td>TA        </td><td>Gd            </td><td>ALQ           </td><td style=\"text-align: right;\">         978</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        284</td><td style=\"text-align: right;\">         1262</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">      1262</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1262</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             3</td><td style=\"text-align: right;\">             1</td><td>TA           </td><td style=\"text-align: right;\">             6</td><td>Typ         </td><td style=\"text-align: right;\">           1</td><td>TA           </td><td>Attchd      </td><td style=\"text-align: right;\">         1976</td><td>RFn           </td><td style=\"text-align: right;\">           2</td><td style=\"text-align: right;\">         460</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">         298</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">              0</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">       5</td><td style=\"text-align: right;\">    2007</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     181500</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   3</td><td style=\"text-align: right;\">          60</td><td>RL        </td><td style=\"text-align: right;\">           68</td><td style=\"text-align: right;\">    11250</td><td>Pave    </td><td>NA     </td><td>IR1       </td><td>Lvl          </td><td>AllPub     </td><td>Inside     </td><td>Gtl        </td><td>CollgCr       </td><td>Norm        </td><td>Norm        </td><td>1Fam      </td><td>2Story      </td><td style=\"text-align: right;\">            7</td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">       2001</td><td style=\"text-align: right;\">          2002</td><td>Gable      </td><td>CompShg   </td><td>VinylSd      </td><td>VinylSd      </td><td>BrkFace     </td><td style=\"text-align: right;\">         162</td><td>Gd         </td><td>TA         </td><td>PConc       </td><td>Gd        </td><td>TA        </td><td>Mn            </td><td>GLQ           </td><td style=\"text-align: right;\">         486</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        434</td><td style=\"text-align: right;\">          920</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">       920</td><td style=\"text-align: right;\">       866</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1786</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">             3</td><td style=\"text-align: right;\">             1</td><td>Gd           </td><td style=\"text-align: right;\">             6</td><td>Typ         </td><td style=\"text-align: right;\">           1</td><td>TA           </td><td>Attchd      </td><td style=\"text-align: right;\">         2001</td><td>RFn           </td><td style=\"text-align: right;\">           2</td><td style=\"text-align: right;\">         608</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">           42</td><td style=\"text-align: right;\">              0</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">       9</td><td style=\"text-align: right;\">    2008</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     223500</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   4</td><td style=\"text-align: right;\">          70</td><td>RL        </td><td style=\"text-align: right;\">           60</td><td style=\"text-align: right;\">     9550</td><td>Pave    </td><td>NA     </td><td>IR1       </td><td>Lvl          </td><td>AllPub     </td><td>Corner     </td><td>Gtl        </td><td>Crawfor       </td><td>Norm        </td><td>Norm        </td><td>1Fam      </td><td>2Story      </td><td style=\"text-align: right;\">            7</td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">       1915</td><td style=\"text-align: right;\">          1970</td><td>Gable      </td><td>CompShg   </td><td>Wd Sdng      </td><td>Wd Shng      </td><td>None        </td><td style=\"text-align: right;\">           0</td><td>TA         </td><td>TA         </td><td>BrkTil      </td><td>TA        </td><td>Gd        </td><td>No            </td><td>ALQ           </td><td style=\"text-align: right;\">         216</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        540</td><td style=\"text-align: right;\">          756</td><td>GasA     </td><td>Gd         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">       961</td><td style=\"text-align: right;\">       756</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1717</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             3</td><td style=\"text-align: right;\">             1</td><td>Gd           </td><td style=\"text-align: right;\">             7</td><td>Typ         </td><td style=\"text-align: right;\">           1</td><td>Gd           </td><td>Detchd      </td><td style=\"text-align: right;\">         1998</td><td>Unf           </td><td style=\"text-align: right;\">           3</td><td style=\"text-align: right;\">         642</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">           35</td><td style=\"text-align: right;\">            272</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">       2</td><td style=\"text-align: right;\">    2006</td><td>WD        </td><td>Abnorml        </td><td style=\"text-align: right;\">     140000</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   5</td><td style=\"text-align: right;\">          60</td><td>RL        </td><td style=\"text-align: right;\">           84</td><td style=\"text-align: right;\">    14260</td><td>Pave    </td><td>NA     </td><td>IR1       </td><td>Lvl          </td><td>AllPub     </td><td>FR2        </td><td>Gtl        </td><td>NoRidge       </td><td>Norm        </td><td>Norm        </td><td>1Fam      </td><td>2Story      </td><td style=\"text-align: right;\">            8</td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">       2000</td><td style=\"text-align: right;\">          2000</td><td>Gable      </td><td>CompShg   </td><td>VinylSd      </td><td>VinylSd      </td><td>BrkFace     </td><td style=\"text-align: right;\">         350</td><td>Gd         </td><td>TA         </td><td>PConc       </td><td>Gd        </td><td>TA        </td><td>Av            </td><td>GLQ           </td><td style=\"text-align: right;\">         655</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        490</td><td style=\"text-align: right;\">         1145</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">      1145</td><td style=\"text-align: right;\">      1053</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       2198</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">             4</td><td style=\"text-align: right;\">             1</td><td>Gd           </td><td style=\"text-align: right;\">             9</td><td>Typ         </td><td style=\"text-align: right;\">           1</td><td>TA           </td><td>Attchd      </td><td style=\"text-align: right;\">         2000</td><td>RFn           </td><td style=\"text-align: right;\">           3</td><td style=\"text-align: right;\">         836</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">         192</td><td style=\"text-align: right;\">           84</td><td style=\"text-align: right;\">              0</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">      12</td><td style=\"text-align: right;\">    2008</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     250000</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   6</td><td style=\"text-align: right;\">          50</td><td>RL        </td><td style=\"text-align: right;\">           85</td><td style=\"text-align: right;\">    14115</td><td>Pave    </td><td>NA     </td><td>IR1       </td><td>Lvl          </td><td>AllPub     </td><td>Inside     </td><td>Gtl        </td><td>Mitchel       </td><td>Norm        </td><td>Norm        </td><td>1Fam      </td><td>1.5Fin      </td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">       1993</td><td style=\"text-align: right;\">          1995</td><td>Gable      </td><td>CompShg   </td><td>VinylSd      </td><td>VinylSd      </td><td>None        </td><td style=\"text-align: right;\">           0</td><td>TA         </td><td>TA         </td><td>Wood        </td><td>Gd        </td><td>TA        </td><td>No            </td><td>GLQ           </td><td style=\"text-align: right;\">         732</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">         64</td><td style=\"text-align: right;\">          796</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">       796</td><td style=\"text-align: right;\">       566</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1362</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             1</td><td>TA           </td><td style=\"text-align: right;\">             5</td><td>Typ         </td><td style=\"text-align: right;\">           0</td><td>NA           </td><td>Attchd      </td><td style=\"text-align: right;\">         1993</td><td>Unf           </td><td style=\"text-align: right;\">           2</td><td style=\"text-align: right;\">         480</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">          40</td><td style=\"text-align: right;\">           30</td><td style=\"text-align: right;\">              0</td><td style=\"text-align: right;\">        320</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>MnPrv  </td><td>Shed         </td><td style=\"text-align: right;\">      700</td><td style=\"text-align: right;\">      10</td><td style=\"text-align: right;\">    2009</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     143000</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   7</td><td style=\"text-align: right;\">          20</td><td>RL        </td><td style=\"text-align: right;\">           75</td><td style=\"text-align: right;\">    10084</td><td>Pave    </td><td>NA     </td><td>Reg       </td><td>Lvl          </td><td>AllPub     </td><td>Inside     </td><td>Gtl        </td><td>Somerst       </td><td>Norm        </td><td>Norm        </td><td>1Fam      </td><td>1Story      </td><td style=\"text-align: right;\">            8</td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">       2004</td><td style=\"text-align: right;\">          2005</td><td>Gable      </td><td>CompShg   </td><td>VinylSd      </td><td>VinylSd      </td><td>Stone       </td><td style=\"text-align: right;\">         186</td><td>Gd         </td><td>TA         </td><td>PConc       </td><td>Ex        </td><td>TA        </td><td>Av            </td><td>GLQ           </td><td style=\"text-align: right;\">        1369</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        317</td><td style=\"text-align: right;\">         1686</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">      1694</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1694</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             3</td><td style=\"text-align: right;\">             1</td><td>Gd           </td><td style=\"text-align: right;\">             7</td><td>Typ         </td><td style=\"text-align: right;\">           1</td><td>Gd           </td><td>Attchd      </td><td style=\"text-align: right;\">         2004</td><td>RFn           </td><td style=\"text-align: right;\">           2</td><td style=\"text-align: right;\">         636</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">         255</td><td style=\"text-align: right;\">           57</td><td style=\"text-align: right;\">              0</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">       8</td><td style=\"text-align: right;\">    2007</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     307000</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   8</td><td style=\"text-align: right;\">          60</td><td>RL        </td><td style=\"text-align: right;\">          nan</td><td style=\"text-align: right;\">    10382</td><td>Pave    </td><td>NA     </td><td>IR1       </td><td>Lvl          </td><td>AllPub     </td><td>Corner     </td><td>Gtl        </td><td>NWAmes        </td><td>PosN        </td><td>Norm        </td><td>1Fam      </td><td>2Story      </td><td style=\"text-align: right;\">            7</td><td style=\"text-align: right;\">            6</td><td style=\"text-align: right;\">       1973</td><td style=\"text-align: right;\">          1973</td><td>Gable      </td><td>CompShg   </td><td>HdBoard      </td><td>HdBoard      </td><td>Stone       </td><td style=\"text-align: right;\">         240</td><td>TA         </td><td>TA         </td><td>CBlock      </td><td>Gd        </td><td>TA        </td><td>Mn            </td><td>ALQ           </td><td style=\"text-align: right;\">         859</td><td>BLQ           </td><td style=\"text-align: right;\">          32</td><td style=\"text-align: right;\">        216</td><td style=\"text-align: right;\">         1107</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">      1107</td><td style=\"text-align: right;\">       983</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       2090</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">             3</td><td style=\"text-align: right;\">             1</td><td>TA           </td><td style=\"text-align: right;\">             7</td><td>Typ         </td><td style=\"text-align: right;\">           2</td><td>TA           </td><td>Attchd      </td><td style=\"text-align: right;\">         1973</td><td>RFn           </td><td style=\"text-align: right;\">           2</td><td style=\"text-align: right;\">         484</td><td>TA          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">         235</td><td style=\"text-align: right;\">          204</td><td style=\"text-align: right;\">            228</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>Shed         </td><td style=\"text-align: right;\">      350</td><td style=\"text-align: right;\">      11</td><td style=\"text-align: right;\">    2009</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     200000</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   9</td><td style=\"text-align: right;\">          50</td><td>RM        </td><td style=\"text-align: right;\">           51</td><td style=\"text-align: right;\">     6120</td><td>Pave    </td><td>NA     </td><td>Reg       </td><td>Lvl          </td><td>AllPub     </td><td>Inside     </td><td>Gtl        </td><td>OldTown       </td><td>Artery      </td><td>Norm        </td><td>1Fam      </td><td>1.5Fin      </td><td style=\"text-align: right;\">            7</td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">       1931</td><td style=\"text-align: right;\">          1950</td><td>Gable      </td><td>CompShg   </td><td>BrkFace      </td><td>Wd Shng      </td><td>None        </td><td style=\"text-align: right;\">           0</td><td>TA         </td><td>TA         </td><td>BrkTil      </td><td>TA        </td><td>TA        </td><td>No            </td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        952</td><td style=\"text-align: right;\">          952</td><td>GasA     </td><td>Gd         </td><td>Y           </td><td>FuseF       </td><td style=\"text-align: right;\">      1022</td><td style=\"text-align: right;\">       752</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1774</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         2</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             2</td><td style=\"text-align: right;\">             2</td><td>TA           </td><td style=\"text-align: right;\">             8</td><td>Min1        </td><td style=\"text-align: right;\">           2</td><td>TA           </td><td>Detchd      </td><td style=\"text-align: right;\">         1931</td><td>Unf           </td><td style=\"text-align: right;\">           2</td><td style=\"text-align: right;\">         468</td><td>Fa          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">          90</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">            205</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">       4</td><td style=\"text-align: right;\">    2008</td><td>WD        </td><td>Abnorml        </td><td style=\"text-align: right;\">     129900</td></tr>\n",
       "<tr><td style=\"text-align: right;\">  10</td><td style=\"text-align: right;\">         190</td><td>RL        </td><td style=\"text-align: right;\">           50</td><td style=\"text-align: right;\">     7420</td><td>Pave    </td><td>NA     </td><td>Reg       </td><td>Lvl          </td><td>AllPub     </td><td>Corner     </td><td>Gtl        </td><td>BrkSide       </td><td>Artery      </td><td>Artery      </td><td>2fmCon    </td><td>1.5Unf      </td><td style=\"text-align: right;\">            5</td><td style=\"text-align: right;\">            6</td><td style=\"text-align: right;\">       1939</td><td style=\"text-align: right;\">          1950</td><td>Gable      </td><td>CompShg   </td><td>MetalSd      </td><td>MetalSd      </td><td>None        </td><td style=\"text-align: right;\">           0</td><td>TA         </td><td>TA         </td><td>BrkTil      </td><td>TA        </td><td>TA        </td><td>No            </td><td>GLQ           </td><td style=\"text-align: right;\">         851</td><td>Unf           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">        140</td><td style=\"text-align: right;\">          991</td><td>GasA     </td><td>Ex         </td><td>Y           </td><td>SBrkr       </td><td style=\"text-align: right;\">      1077</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">       1077</td><td style=\"text-align: right;\">             1</td><td style=\"text-align: right;\">             0</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">         0</td><td style=\"text-align: right;\">             2</td><td style=\"text-align: right;\">             2</td><td>TA           </td><td style=\"text-align: right;\">             5</td><td>Typ         </td><td style=\"text-align: right;\">           2</td><td>TA           </td><td>Attchd      </td><td style=\"text-align: right;\">         1939</td><td>RFn           </td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">         205</td><td>Gd          </td><td>TA          </td><td>Y           </td><td style=\"text-align: right;\">           0</td><td style=\"text-align: right;\">            4</td><td style=\"text-align: right;\">              0</td><td style=\"text-align: right;\">          0</td><td style=\"text-align: right;\">            0</td><td style=\"text-align: right;\">         0</td><td>NA      </td><td>NA     </td><td>NA           </td><td style=\"text-align: right;\">        0</td><td style=\"text-align: right;\">       1</td><td style=\"text-align: right;\">    2008</td><td>WD        </td><td>Normal         </td><td style=\"text-align: right;\">     118000</td></tr>\n",
       "</tbody>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T03:39:05.847606Z",
     "start_time": "2021-08-27T03:39:05.843782Z"
    }
   },
   "outputs": [],
   "source": [
    "y = \"SalePrice\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T05:06:49.541474Z",
     "start_time": "2021-08-27T03:39:12.629264Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AutoML progress: |████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "aml2 = H2OAutoML(max_runtime_secs = 7200, seed = 1, project_name = \"kaggle_house_price\")\n",
    "aml2.train(y = y, training_frame = train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T05:06:49.939508Z",
     "start_time": "2021-08-27T05:06:49.658017Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gbm prediction progress: |████████████████████████████████████████████████| 100%\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'MSZoning' has levels not trained on: [\"NA\"]\n",
      "  warnings.warn(w)\n",
      "/home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'Utilities' has levels not trained on: [\"NA\"]\n",
      "  warnings.warn(w)\n",
      "/home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'Exterior1st' has levels not trained on: [\"NA\"]\n",
      "  warnings.warn(w)\n",
      "/home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'Exterior2nd' has levels not trained on: [\"NA\"]\n",
      "  warnings.warn(w)\n",
      "/home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'KitchenQual' has levels not trained on: [\"NA\"]\n",
      "  warnings.warn(w)\n",
      "/home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'Functional' has levels not trained on: [\"NA\"]\n",
      "  warnings.warn(w)\n",
      "/home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/job.py:72: UserWarning: Test/Validation dataset column 'SaleType' has levels not trained on: [\"NA\"]\n",
      "  warnings.warn(w)\n"
     ]
    }
   ],
   "source": [
    "pred = aml2.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T05:06:50.307479Z",
     "start_time": "2021-08-27T05:06:50.067259Z"
    }
   },
   "outputs": [],
   "source": [
    "sub = pd.read_csv(test_path)\n",
    "sub = sub[['Id']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T06:23:28.383395Z",
     "start_time": "2021-08-27T06:23:28.325080Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "pred = h2o.as_list(pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T06:23:50.243261Z",
     "start_time": "2021-08-27T06:23:50.143855Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "sub[y] = pred['predict']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-08-27T06:23:53.088718Z",
     "start_time": "2021-08-27T06:23:53.025021Z"
    }
   },
   "outputs": [],
   "source": [
    "sub.to_csv(\"./h2o_sub_kaggle_house_price.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
